library(janitor)
library(readODS)
library(httr)
library(here)
library(data.table)
library(sf)
library(leaflet)
library(leaflet.extras)
library(tidyverse)
# loading in shape file 
uk_shape_file <- st_read(here("raw_data/LA_shape/Local_Authority_Districts__April_2019__UK_BFE_v2.shp")) %>%
  clean_names() %>% 
  st_simplify(dTolerance = 1000) %>%
  st_transform("+proj=longlat +datum=WGS84") %>% 
  dplyr::select(lad19cd, long, lat, geometry) 
# Set colours and bins
pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))

# Set labels
uk_ev_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Electric Vehicles",
  uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
  lapply(htmltools::HTML)
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")

How many electric vehicles are on the road across the UK by LA?

# Reading in and skipping first 5 rows
uk_ev_postcode <- read_ods(here("raw_data/ev_by_postcode.ods"), sheet = 2, skip = 6) %>% 
  rename("postcode" = PostcodeDistrict2)
Error: Can't rename columns that don't exist.
x Column `PostcodeDistrict2` doesn't exist.
Run `rlang::last_error()` to see where the error occurred.
# loading in shape file 
uk_shape_file_postcode <- st_read(here("raw_data/postcode_shape/EX_Sample.shp")) %>%
  clean_names() %>% 
  st_simplify(dTolerance = 1000) %>%
  st_transform("+proj=longlat +datum=WGS84") #%>% 
#  select(lad19cd, long, lat, geometry) 
# Joining uk_ev + shape file 
uk_ev_map_postcode <- uk_ev_postcode %>% 
  left_join(uk_shape_file_postcode, by = c("ons_la_code_apr_2019" = "lad19cd")) %>% 
  drop_na() %>% 
  mutate(across(c(x2021_q1:x2011_q4), as.numeric)) %>% 
  st_as_sf()
    pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))
    
    uk_ev_map_labels <- sprintf(
      "<strong>%s</strong><br/>%g Electric Vehicles",
      uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
      lapply(htmltools::HTML)
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")
# Wrangling to create an EV count over time plot 
uk_ev_longer <- uk_ev %>%
  # Pivot longer to get year and count columns
  pivot_longer(cols = c(x2021_q1:x2011_q4), names_to = c("year"), values_to = "no_of_ev") %>% 
  # Filter so we only have UK as a whole data AND we only want final numbers of the year so Q4 
  filter(region_local_authority_apr_2019_3 == "United Kingdom" & str_detect(year, "q4")) %>% 
  # Simplify to just show year
  mutate(year = case_when(str_detect(year, "2021") ~ "2021",
         str_detect(year, "2020") ~ "2020",
         str_detect(year, "2019") ~ "2019",
         str_detect(year, "2018") ~ "2018",
         str_detect(year, "2017") ~ "2017",
         str_detect(year, "2016") ~ "2016",
         str_detect(year, "2015") ~ "2015",
         str_detect(year, "2014") ~ "2014",
         str_detect(year, "2013") ~ "2013",
         str_detect(year, "2012") ~ "2012",
         str_detect(year, "2011") ~ "2011"),
         year = as.numeric(year),
         no_of_ev = as.numeric(no_of_ev))

Row binding grid NO2 data

no2_2010 <- read_csv(here("raw_data/no2_by_grid_2010.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2011 <- read_csv(here("raw_data/no2_by_grid_2011.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2012 <- read_csv(here("raw_data/no2_by_grid_2012.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2013 <- read_csv(here("raw_data/no2_by_grid_2013.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2015 <- read_csv(here("raw_data/no2_by_grid_2015.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2016 <- read_csv(here("raw_data/no2_by_grid_2016.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2017 <- read_csv(here("raw_data/no2_by_grid_2017.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2018 <- read_csv(here("raw_data/no2_by_grid_2018.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))

# Create empty data frame
no2_all <- data_frame()

# For each year, bind rows to one dataset
for (i in 2010:2019) {
  df_name <- paste0("no2_", i)
  df_input <- as.name(df_name)
  
  df <- eval(df_input) %>% 
    mutate(year = i)
  
no2_all <- bind_rows(no2_all, df)
  }
# Remove missing NO2 grid values
no2_clean <- no2_all %>% 
  filter(no2 != "MISSING")
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_clean_row <- no2_clean %>% 
  rowid_to_column()

# Source data
xy <- no2_clean_row %>% 
  select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
Warning in project(xy, proj4string, inverse = TRUE) :
  more than two dimensions found, using first two
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
final <-  merge(no2_clean_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  filter(year == 2019) %>% 
  select(lat, lon, no2) 
final <- final %>% 
  mutate(no2 = as.numeric(no2)) 
bins <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
pal <- colorBin("Spectral", domain = final$no2, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2,
             minOpacity = 0.1,
             max = 45,
             radius = 1,
             blur = 1) %>% 
  addLegend(pal = pal, values = final$no2,
                title="Average NO2 Conc")
no2_annual_mean <- read_ods(here("raw_data/NO2_tables.ods"), sheet = 3, skip = 2) %>% 
  clean_names()
no2_annual_mean_all <- no2_annual_mean %>% 
  filter(site == "All sites") %>% 
  mutate(year = as.numeric(year))
no2_annual_mean_all %>% 
  ggplot() +
  aes(x = year, y = annual_mean_no2concentration_mg_m3) + 
  geom_point() +
  geom_smooth(se = FALSE) +
  theme_minimal() +
  scale_y_continuous(breaks = seq(0, 70, 10), limits = c(0, 70)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  labs(x = "\nYear\n", 
       y = "\nAnnual Mean NO2 Conc mg m3\n",
       title = "\nNO2 over time in the UK\n") 
`geom_smooth()` using method = 'loess' and formula 'y ~ x'

library(tidyverse)
library(fable)
library(tsibble)
library(tsibbledata)
library(ggfortify)
forecast_no2 <- no2_annual_mean_all %>% 
  dplyr::select(year, annual_mean_no2concentration_mg_m3) %>% 
  tsibble(index = year)
autoplot(forecast_no2)
Plot variable not specified, automatically selected `.vars = annual_mean_no2concentration_mg_m3`

fit <- forecast_no2 %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
fit
forecast_1 <- fit %>%
  fabletools::forecast(h = "15 years")
forecast_1
forecast_1 %>%
  autoplot(forecast_no2, level = NULL) +
  ggtitle("Forecasts for NO2 Conc over time") +
  xlab("Year") +
  guides(colour = guide_legend(title = "Forecast")) +
  scale_y_continuous(limit = c(0, 65))
Warning: Removed 1 row(s) containing missing values (geom_path).

# Now set our training data from 1992 to 2006
train <- forecast_no2 %>%
  filter(year <= 2017)

# run the model on the training set 
fit_train <- train %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
# forecast from the training set
forecast_test <- fit_train %>% 
  fabletools::forecast(h = "4 years")

# Plot forecasts against actual values
forecast_test %>%
  autoplot(train, level = NULL) +
    autolayer(filter(forecast_no2, year >= 2017), color = "black") +
    ggtitle("Forecasts for NO2 Conc over time") +
    xlab("Year") + ylab("Megalitres") +
    guides(colour=guide_legend(title="Forecast"))
Plot variable not specified, automatically selected `.vars = annual_mean_no2concentration_mg_m3`

# Binding 2014 and 2019
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2014")
Rows: 281802 Columns: 4
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2019")
Rows: 281802 Columns: 4
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): no2
dbl (3): uk_grid_code, x, y

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
no2_difference <- bind_rows(no2_2014, no2_2019)
# Diff in NO2 between 2014 & 2019
no2_difference <- no2_difference %>% 
  filter(no2 != "MISSING") %>% 
  mutate(no2 = as.numeric(no2),
         year = as.numeric(year)) %>% 
  group_by(x, y) %>% 
  pivot_wider(names_from = "year", values_from = "no2") %>% 
  rename("x2014" = "2014",
         "x2019" = "2019") %>% 
  mutate(no2_diff_2014_2019 = x2014 - x2019)  %>% 
#Remove this separation when put in cleaning script
  ungroup() %>% 
#Remove this separation when put in cleaning script
  drop_na() %>% 
#Remove this separation when put in cleaning script
# Changing all negative numbers to zero as these aren't of interest to us
  mutate(no2_diff_2014_2019 = if_else(no2_diff_2014_2019 < 0, 0, no2_diff_2014_2019))
# Converting NO2 diff from x y to lat long
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_diff_row <- no2_difference %>% 
  rowid_to_column()

# Source data
xy <- no2_diff_row %>% 
  dplyr::select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
Warning in project(xy, proj4string, inverse = TRUE) :
  more than two dimensions found, using first two
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
no2_diff_final <-  merge(no2_diff_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  dplyr::select(lat, lon, no2_diff_2014_2019) 
# Geospatial of No2 diff 2014 to 2019
bins <- c(0, 5, 10, 15, 20, 25, 30)
pal <- colorBin("Spectral", domain = no2_diff_final$no2_diff_2014_2019, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = no2_diff_final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2_diff_2014_2019,
             minOpacity = 0.1,
             max = 30,
             radius = 1,
             blur = 2) %>% 
  addLegend(pal = pal, values = no2_diff_final$no2_diff_2014_2019,
                title="Annual Mean NO2 Change between 2014 and 2019")
no2_difference %>% 
  ggplot() +
  geom_histogram(aes(x = no2_diff_2014_2019))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ev_charging <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_02-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  dplyr::select(year, month, total_devices, rapid_devices) %>% 
  filter(rapid_devices != "NA") %>% 
  mutate(total_devices = as.numeric(total_devices),
         other_devices = total_devices - rapid_devices) %>% 
  select(-total_devices) %>% 
  pivot_longer(cols = c(other_devices, rapid_devices), names_to = "charger", values_to = "number_of_chargers") %>% 
  filter(month == "October" | month == "July" & year == "2021") %>% 
  mutate(charger = factor(charger, levels = c("rapid_devices", "other_devices")))

ev_charging_rapid <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01b-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_rapid" = x6)
ev_charging_total <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01a-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_total" = x6)

ev_charging_geo <- inner_join(ev_charging_rapid, ev_charging_total, by = "la_region_code") %>% 
  drop_na() %>% 
  select(la_region_code, local_authority_region_name.x, count_rapid, count_total) %>% 
  filter(str_detect(local_authority_region_name.x, "[a-z][a-z]"), 
         count_rapid != "-") %>% 
  mutate(local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(](Met County)[)]"),
         local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(]abolished \\w+ \\d+[)]"))
# Joining ev_charging_geo + shape file 
ev_charging_map <- ev_charging_geo %>% 
  inner_join(uk_shape_file, by = c("la_region_code" = "lad19cd")) %>% 
  mutate(across(c(count_rapid:count_total), as.numeric)) %>% 
  st_as_sf()
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_total, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_total, ev_charging_map$count_rapid) %>% 
  lapply(htmltools::HTML)
# Geospatial of EV Total Chargers in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_total),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_total, opacity = 0.7, title = NULL,
            position = "bottomright")
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_rapid, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_rapid, ev_charging_map$count_total) %>% 
  lapply(htmltools::HTML)
# Geospatial of EV Rapid Chargers per 100k people in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_rapid),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_rapid, opacity = 0.7, title = NULL,
            position = "bottomright")
# st_join seems less dirty
ev_no2 <- final %>% 
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(highlands)) %>% 
  st_join(uk_ev_map_2021, join = st_intersects, left = FALSE) group_by(region_local_authority_apr_2019_3) %>% 
Error: unexpected symbol in:
"  st_as_sf(coords = c("lon", "lat"), crs = st_crs(highlands)) %>% 
  st_join(uk_ev_map_2021, join = st_intersects, left = FALSE) group_by"
ev_no2 %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

population <- read_csv(here("raw_data/ukpopestimatesmid2020on2021geography/MYE2 - Persons-Table 1.csv"), skip = 7) %>% 
  clean_names() %>% 
  select(code, name, all_ages) %>% 
  mutate(pop_per_100k = all_ages/100000)
New names:
* `` -> ...96
* `` -> ...97
* `` -> ...98
* `` -> ...99
* `` -> ...100
Rows: 420 Columns: 100
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Code, Name, Geography
lgl (5): ...96, ...97, ...98, ...99, ...100

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ev_pop_100k %>% 
  ggplot() +
  aes(x = ev_per_100k, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
Warning: Removed 11 rows containing non-finite values (stat_smooth).
Warning: Removed 11 rows containing missing values (geom_point).

library(cluster)
library(factoextra)
ev_scale <- ev_pop_100k %>% 
  select(region_local_authority_apr_2019_3, x2021_q1, mean_no2) %>% 
  mutate_if(is.numeric, scale)
ev_pop_100k_scale <- ev_pop_100k %>% 
  select(region_local_authority_apr_2019_3, ev_per_100k, mean_no2) %>% 
  mutate_if(is.numeric, scale)
ev_scale %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
`geom_smooth()` using formula 'y ~ x'

ev_pop_100k_scale %>% 
  ggplot() +
  aes(x = ev_per_100k, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 11 rows containing non-finite values (stat_smooth).
Warning: Removed 11 rows containing missing values (geom_point).

library(corrplot)
corrplot 0.90 loaded

car_by_fuel_gb <- read_ods(here("raw_data/car_by_fuel_by_year_ALL_UK.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(58) %>% 
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("year", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count"))
car_by_fuel_gb %>% 
  ggplot() +
  aes(x = year, y = count, colour = fuel_type) +
  geom_smooth(method = lm) +
  geom_point() 
`geom_smooth()` using formula 'y ~ x'

new_car_by_fuel_gb <- read_ods(here("raw_data/new_car_by_fuel.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(21) %>% 
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("date", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count"))

---
title: "R Notebook"
output: html_notebook
---

```{r}
library(janitor)
library(readODS)
library(httr)
library(here)
library(data.table)
library(sf)
library(leaflet)
library(leaflet.extras)
library(tidyverse)
```

```{r}
# loading in shape file 
uk_shape_file <- st_read(here("raw_data/LA_shape/Local_Authority_Districts__April_2019__UK_BFE_v2.shp")) %>%
  clean_names() %>% 
  st_simplify(dTolerance = 1000) %>%
  st_transform("+proj=longlat +datum=WGS84") %>% 
  dplyr::select(lad19cd, long, lat, geometry) 
```

```{r}
# Load in clean Electric Vehicles by Local Authority data
uk_ev <- read_csv(here("clean_data/ev_by_la_clean.csv"))
# Joining uk_ev + shape file 
uk_ev_map <- uk_ev %>% 
  left_join(uk_shape_file, by = c("ons_la_code_apr_2019" = "lad19cd")) %>% 
  drop_na() %>% 
  mutate(across(c(x2021_q1:x2011_q4), as.numeric)) %>% 
  st_as_sf()
```

```{r}
# Set colours and bins
pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))

# Set labels
uk_ev_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Electric Vehicles",
  uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
  lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")
```
# How many electric vehicles are on the road across the UK by LA?

```{r}
# Reading in and skipping first 5 rows
uk_ev_postcode <- read_ods(here("raw_data/ev_by_postcode.ods"), sheet = 2, skip = 6) %>% 
  rename("postcode" = "Postcode District2")
```

```{r}
# loading in shape file 
uk_shape_file_postcode <- st_read(here("raw_data/postcode_shape/EX_Sample.shp")) %>%
  clean_names() %>% 
  st_simplify(dTolerance = 1000) %>%
  st_transform("+proj=longlat +datum=WGS84") #%>% 
#  select(lad19cd, long, lat, geometry) 
```

```{r}
# Joining uk_ev + shape file 
uk_ev_map_postcode <- uk_ev_postcode %>% 
  left_join(uk_shape_file_postcode, by = c("ons_la_code_apr_2019" = "lad19cd")) %>% 
  drop_na() %>% 
  mutate(across(c(x2021_q1:x2011_q4), as.numeric)) %>% 
  st_as_sf()
```


```{r}
    pal <- colorBin("Greens", domain = uk_ev_map$x2021_q1, bins = c(0, 500, 1000, 2500, 5000, 10000, 15000))
    
    uk_ev_map_labels <- sprintf(
      "<strong>%s</strong><br/>%g Electric Vehicles",
      uk_ev_map$region_local_authority_apr_2019_3, uk_ev_map$x2021_q1) %>% 
      lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Vehicles in the UK 2021 Q1
uk_ev_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(x2021_q1),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = uk_ev_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~x2021_q1, opacity = 0.7, title = NULL,
            position = "bottomright")
```
  
```{r}
# Wrangling to create an EV count over time plot 
uk_ev_longer <- uk_ev %>%
  # Pivot longer to get year and count columns
  pivot_longer(cols = c(x2021_q1:x2011_q4), names_to = c("year"), values_to = "no_of_ev") %>% 
  # Filter so we only have UK as a whole data AND we only want final numbers of the year so Q4 
  filter(region_local_authority_apr_2019_3 == "United Kingdom" & str_detect(year, "q4")) %>% 
  # Simplify to just show year
  mutate(year = case_when(str_detect(year, "2021") ~ "2021",
         str_detect(year, "2020") ~ "2020",
         str_detect(year, "2019") ~ "2019",
         str_detect(year, "2018") ~ "2018",
         str_detect(year, "2017") ~ "2017",
         str_detect(year, "2016") ~ "2016",
         str_detect(year, "2015") ~ "2015",
         str_detect(year, "2014") ~ "2014",
         str_detect(year, "2013") ~ "2013",
         str_detect(year, "2012") ~ "2012",
         str_detect(year, "2011") ~ "2011"),
         year = as.numeric(year),
         no_of_ev = as.numeric(no_of_ev))
```


```{r}
uk_ev_longer %>% 
  ggplot() +
  aes(x = year, y = no_of_ev) +
  geom_col(fill = "lawngreen") +
  scale_x_continuous(breaks = c(2011:2020)) +
  scale_y_continuous(breaks = seq(0, 220000, by = 20000), limits = c(0, 220000)) +
  labs(title = "\nNumber of Electric Vehicles over time in the UK\n",
       x = "\nYear\n",
       y = "\nNumber of Electric Vehicles\n") +
  theme_minimal() 
```

  


# Row binding grid NO2 data 

```{r}
no2_2010 <- read_csv(here("raw_data/no2_by_grid_2010.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2011 <- read_csv(here("raw_data/no2_by_grid_2011.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2012 <- read_csv(here("raw_data/no2_by_grid_2012.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2013 <- read_csv(here("raw_data/no2_by_grid_2013.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2015 <- read_csv(here("raw_data/no2_by_grid_2015.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2016 <- read_csv(here("raw_data/no2_by_grid_2016.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2017 <- read_csv(here("raw_data/no2_by_grid_2017.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2018 <- read_csv(here("raw_data/no2_by_grid_2018.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2"))

# Create empty data frame
no2_all <- data_frame()

# For each year, bind rows to one dataset
for (i in 2010:2019) {
  df_name <- paste0("no2_", i)
  df_input <- as.name(df_name)
  
  df <- eval(df_input) %>% 
    mutate(year = i)
  
no2_all <- bind_rows(no2_all, df)
  }
```

```{r}
# Remove missing NO2 grid values
no2_clean <- no2_all %>% 
  filter(no2 != "MISSING")
```

```{r}
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_clean_row <- no2_clean %>% 
  rowid_to_column()

# Source data
xy <- no2_clean_row %>% 
  select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
final <-  merge(no2_clean_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  filter(year == 2019) %>% 
  select(lat, lon, no2) 
```

```{r}
final <- final %>% 
  mutate(no2 = as.numeric(no2)) 
```

```{r}
bins <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
pal <- colorBin("Spectral", domain = final$no2, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2,
             minOpacity = 0.1,
             max = 45,
             radius = 1,
             blur = 1) %>% 
  addLegend(pal = pal, values = final$no2,
                title="Average NO2 Conc")
```

```{r}
no2_annual_mean <- read_ods(here("raw_data/NO2_tables.ods"), sheet = 3, skip = 2) %>% 
  clean_names()
```

```{r}
no2_annual_mean_all <- no2_annual_mean %>% 
  filter(site == "All sites") %>% 
  mutate(year = as.numeric(year))
```

```{r}
no2_annual_mean_all %>% 
  ggplot() +
  aes(x = year, y = annual_mean_no2concentration_mg_m3) + 
  geom_point() +
  geom_smooth(se = FALSE) +
  theme_minimal() +
  scale_y_continuous(breaks = seq(0, 70, 10), limits = c(0, 70)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  labs(x = "\nYear\n", 
       y = "\nAnnual Mean NO2 Conc mg m3\n",
       title = "\nNO2 over time in the UK\n") 
```


```{r}
library(tidyverse)
library(fable)
library(tsibble)
library(tsibbledata)
library(ggfortify)
```

```{r}
forecast_no2 <- no2_annual_mean_all %>% 
  dplyr::select(year, annual_mean_no2concentration_mg_m3) %>% 
  tsibble(index = year)
```

```{r}
autoplot(forecast_no2)
```

```{r}
fit <- forecast_no2 %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
fit
```

```{r}
forecast_1 <- fit %>%
  fabletools::forecast(h = "15 years")
forecast_1
```

```{r}
forecast_1 %>%
  autoplot(forecast_no2, level = NULL) +
  ggtitle("Forecasts for NO2 Conc over time") +
  xlab("Year") +
  guides(colour = guide_legend(title = "Forecast")) +
  scale_y_continuous(limit = c(0, 65))
```

```{r}
# Now set our training data from 1992 to 2006
train <- forecast_no2 %>%
  filter(year <= 2017)

# run the model on the training set 
fit_train <- train %>%
  model(
    arima = ARIMA(annual_mean_no2concentration_mg_m3)
  )
```

```{r}
# forecast from the training set
forecast_test <- fit_train %>% 
  fabletools::forecast(h = "4 years")

# Plot forecasts against actual values
forecast_test %>%
  autoplot(train, level = NULL) +
    autolayer(filter(forecast_no2, year >= 2017), color = "black") +
    ggtitle("Forecasts for NO2 Conc over time") +
    xlab("Year") + ylab("Megalitres") +
    guides(colour=guide_legend(title="Forecast"))
```

```{r}
# Binding 2014 and 2019
no2_2014 <- read_csv(here("raw_data/no2_by_grid_2014.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2014")
no2_2019 <- read_csv(here("raw_data/no2_by_grid_2019.csv"), skip = 6, 
  col_names = c("uk_grid_code", "x", "y", "no2")) %>% 
  add_column(year = "2019")

no2_difference <- bind_rows(no2_2014, no2_2019)
```

```{r}
# Diff in NO2 between 2014 & 2019
no2_difference <- no2_difference %>% 
  filter(no2 != "MISSING") %>% 
  mutate(no2 = as.numeric(no2),
         year = as.numeric(year)) %>% 
  group_by(x, y) %>% 
  pivot_wider(names_from = "year", values_from = "no2") %>% 
  rename("x2014" = "2014",
         "x2019" = "2019") %>% 
  mutate(no2_diff_2014_2019 = x2014 - x2019)  %>% 
#Remove this separation when put in cleaning script
  ungroup() %>% 
#Remove this separation when put in cleaning script
  drop_na() %>% 
#Remove this separation when put in cleaning script
# Changing all negative numbers to zero as these aren't of interest to us
  mutate(no2_diff_2014_2019 = if_else(no2_diff_2014_2019 < 0, 0, no2_diff_2014_2019))
```

```{r}
# Converting NO2 diff from x y to lat long
library(proj4)
proj4string <- "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs"

no2_diff_row <- no2_difference %>% 
  rowid_to_column()

# Source data
xy <- no2_diff_row %>% 
  dplyr::select(x, y, rowid)

# Transformed data
pj <- project(xy, proj4string, inverse=TRUE)
latlon <- data.frame(xy, lat=pj$y, lon=pj$x)
no2_diff_final <-  merge(no2_diff_row, latlon, by.x = "rowid", by.y = "rowid") %>%
  dplyr::select(lat, lon, no2_diff_2014_2019) 
```

```{r}
# Geospatial of No2 diff 2014 to 2019
bins <- c(0, 5, 10, 15, 20, 25, 30)
pal <- colorBin("Spectral", domain = no2_diff_final$no2_diff_2014_2019, bins = bins, na.color = "transparent", reverse = TRUE)

leaflet() %>%
  addProviderTiles("CartoDB.Positron", options = providerTileOptions(noWrap = TRUE)) %>%
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addHeatmap(data = no2_diff_final,
             lng = ~lon,
             lat = ~lat,
             intensity = ~no2_diff_2014_2019,
             minOpacity = 0.1,
             max = 30,
             radius = 1,
             blur = 2) %>% 
  addLegend(pal = pal, values = no2_diff_final$no2_diff_2014_2019,
                title="Annual Mean NO2 Change between 2014 and 2019")
```


```{r}
no2_difference %>% 
  ggplot() +
  geom_histogram(aes(x = no2_diff_2014_2019))
```

```{r}
ev_charging <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_02-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  dplyr::select(year, month, total_devices, rapid_devices) %>% 
  filter(rapid_devices != "NA") %>% 
  mutate(total_devices = as.numeric(total_devices),
         other_devices = total_devices - rapid_devices) %>% 
  select(-total_devices) %>% 
  pivot_longer(cols = c(other_devices, rapid_devices), names_to = "charger", values_to = "number_of_chargers") %>% 
  filter(month == "October" | month == "July" & year == "2021") %>% 
  mutate(charger = factor(charger, levels = c("rapid_devices", "other_devices")))
```

```{r}
ev_charging %>% 
  ggplot() +
  aes(x = year, y = number_of_chargers, fill = charger) +
  geom_col() +
  scale_y_continuous(breaks = seq(0, 30000, 2500)) +
  coord_flip() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 270, vjust = 0.5, hjust=1)) +
  scale_fill_manual(labels = c("Rapid Charger", "Standard Charger"), values = c("#e41a1c", "#4daf4a")) +
  labs(title = "\nNumber of Electric Vehicle Chargers Growth in the UK\n",
       x = "\nYear\n",
       y = "\nNumber of Chargers\n",
       fill = "Charger Type")
```

```{r}
ev_charging_rapid <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01b-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_rapid" = x6)
ev_charging_total <- read_csv(here("raw_data/electric-vehicle-charging-device-statistics-july-2021/EVCD_01a-Table 1.csv"), skip = 6) %>% 
  clean_names() %>% 
  select(la_region_code, local_authority_region_name, x6) %>% 
  rename("count_total" = x6)

ev_charging_geo <- inner_join(ev_charging_rapid, ev_charging_total, by = "la_region_code") %>% 
  drop_na() %>% 
  select(la_region_code, local_authority_region_name.x, count_rapid, count_total) %>% 
  filter(str_detect(local_authority_region_name.x, "[a-z][a-z]"), 
         count_rapid != "-") %>% 
  mutate(local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(](Met County)[)]"),
         local_authority_region_name.x = str_remove_all(local_authority_region_name.x, "[(]abolished \\w+ \\d+[)]"))
```

```{r}
# Joining ev_charging_geo + shape file 
ev_charging_map <- ev_charging_geo %>% 
  inner_join(uk_shape_file, by = c("la_region_code" = "lad19cd")) %>% 
  mutate(across(c(count_rapid:count_total), as.numeric)) %>% 
  st_as_sf()
```

```{r}
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_total, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers per 100k Population",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_total, ev_charging_map$count_rapid) %>% 
  lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Chargers Per 100k people in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_total),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_total, opacity = 0.7, title = NULL,
            position = "bottomright")
```


```{r}
# Set colours and bins
pal <- colorBin("Reds", domain = ev_charging_map$count_rapid, bins = 10)

# Set labels
ev_charging_map_labels <- sprintf(
  "<strong>%s</strong><br/>%g Chargers",
  ev_charging_map$local_authority_region_name.x, ev_charging_map$count_rapid, ev_charging_map$count_total) %>% 
  lapply(htmltools::HTML)
```

```{r}
# Geospatial of EV Rapid Chargers per 100k people in the UK April 21
ev_charging_map %>% 
  leaflet() %>% 
  setView(lng = -4.2026, lat = 55.8, zoom = 4.7, options = list()) %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(fillColor = ~pal(count_rapid),
    weight = 0.1,
    opacity = 0.9, 
    color = "black",
    fillOpacity = 0.8,
    highlightOptions = highlightOptions(color = "green", weight = 2,
                                        bringToFront = TRUE),
    label = ev_charging_map_labels,
    labelOptions = labelOptions(
      style = list("font-weight" = "normal", padding = "3px 8px"),
      textsize = "15px",
      direction = "auto")) %>% 
  addLegend(pal = pal, values = ~count_rapid, opacity = 0.7, title = NULL,
            position = "bottomright")
```

```{r}
uk_ev_map_2021 <- uk_ev_map %>% 
  select(ons_la_code_apr_2019, region_local_authority_apr_2019_3, x2021_q1, geometry)

# filter for desired polygon
highlands <- uk_ev_map_2021 %>% 
  filter(region_local_authority_apr_2019_3 == "Highland") 

# st_join seems less dirty
ev_no2 <- final %>% 
  st_as_sf(coords = c("lon", "lat"), crs = st_crs(highlands)) %>% 
  st_join(uk_ev_map_2021, join = st_intersects, left = FALSE) %>% 
  group_by(region_local_authority_apr_2019_3) %>% 
  mutate(mean_no2 = sum(no2))
```

```{r}
ev_no2 %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE)
```

```{r}
population <- read_csv(here("raw_data/ukpopestimatesmid2020on2021geography/MYE2 - Persons-Table 1.csv"), skip = 7) %>% 
  clean_names() %>% 
  select(code, name, all_ages) %>% 
  mutate(pop_per_100k = all_ages/100000)
```

```{r}
ev_no2_tbl <- tibble(ev_no2) %>% 
  select(-c(no2, geometry)) %>% 
  unique()
```


```{r}
ev_pop_100k <- ev_no2_tbl %>% 
  left_join(population, by = c("ons_la_code_apr_2019" = "code")) %>% 
  mutate(ev_per_100k = x2021_q1/pop_per_100k) 
```

```{r}
ev_pop_100k %>% 
  ggplot() +
  aes(x = ev_per_100k, y = mean_no2) +
  geom_point() +
  geom_smooth(se = TRUE)
```

```{r}
library(cluster)
library(factoextra)
```

```{r}
ev_scale <- ev_pop_100k %>% 
  select(region_local_authority_apr_2019_3, x2021_q1, mean_no2) %>% 
  mutate_if(is.numeric, scale)
```

```{r}
ev_pop_100k_scale <- ev_pop_100k %>% 
  select(region_local_authority_apr_2019_3, ev_per_100k, mean_no2) %>% 
  mutate_if(is.numeric, scale)
```

```{r}
ev_scale %>% 
  ggplot() +
  aes(x = x2021_q1, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
```

```{r}
ev_pop_100k_scale %>% 
  ggplot() +
  aes(x = ev_per_100k, y = mean_no2) +
  geom_point() +
  geom_smooth(method = lm)
```

```{r}
library(corrplot)
```
```{r}
ev_pop_100k_scale %>% 
  as_tibble() %>%
  pivot_longer(-region_local_authority_apr_2019_3,
               names_to = "type", 
               values_to = "value") %>% 
  drop_na() %>% 
  group_by(type) %>%
  summarise(mean = round(mean(value)), 
            sd = sd(value))
```
```{r}
ev_pop_100k_scale_numeric <- ev_pop_100k_scale %>% 
  select(-region_local_authority_apr_2019_3)
```


```{r}
corrplot(cor(ev_pop_100k_scale_numeric), method = "number", type = "lower")
```

```{r}
car_by_fuel_gb <- read_ods(here("raw_data/car_by_fuel_by_year_ALL_UK.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(58) %>% 
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("year", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count"))
```

```{r}
car_by_fuel_gb %>% 
  ggplot() +
  aes(x = year, y = count, colour = fuel_type) +
  geom_smooth(method = lm) +
  geom_point() 
```

```{r}
new_car_by_fuel_gb <- read_ods(here("raw_data/new_car_by_fuel.ods"), skip = 7) %>% 
  clean_names() %>% 
  head(21) %>% 
  mutate(across(c(petrol:zero_emission8), as.numeric)) %>% 
  filter(petrol >= 100) %>% 
  mutate(traditional_fuel = total - alternative_fuels7 - zero_emission8) %>% 
  select(c("date", "petrol", "diesel", "alternative_fuels7", "zero_emission8")) %>% 
  pivot_longer(cols = c(petrol:zero_emission8), names_to = c("fuel_type"), values_to = c("count"))
```

```{r}
new_car_by_fuel_gb %>% 
  ggplot() +
  aes(x = date, y = count, colour = fuel_type) +
  geom_line(aes(group = fuel_type)) +
  theme_minimal() +
  labs(title = "New Car Registrations by Fuel Type",
       x = "Year",
       y = "Number of Cars ('000s)",
       colour = "Fuel Type") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_colour_manual(labels = c("Alternative Fuels", "Diesel", "Petrol", "Zero Emission"), values = c("#abdda4", "#d7191c", "#fdae61", "#2b83ba")) 
```

